/***
This do-file produces the median 2-bedroom rent county-level data we use from 
the CSV downloaded from Census.
***/

*-------------------------------------------------------------------------------
* Set up
*-------------------------------------------------------------------------------

* Set $root
project figstabs, root
if (r(buildrunning)==0) include "${root}/code/config_interactive.do"

* Create required folders
cap mkdir "${root}/data/derived/ACS 2014-2018 5-Year County"
cap mkdir "${root}/data/derived/ACS 2014-2018 5-Year County/Individual Variables"

*-------------------------------------------------------------------------------
* Load and clean raw data
*-------------------------------------------------------------------------------

cd "${root}/data/derived/ACS 2014-2018 5-Year County/Individual Variables"

project, uses("${root}/data/dvc/ACS 2014-2018 5-Year County/Median 2BR Rent/ACSDT5Y2018.B25031_2020-08-21T132840.zip") raw
unzipfile "${root}/data/dvc/ACS 2014-2018 5-Year County/Median 2BR Rent/ACSDT5Y2018.B25031_2020-08-21T132840.zip", replace

project, uses("${root}/data/derived/ACS 2014-2018 5-Year County/Individual Variables/ACSDT5Y2018.B25031_data_with_overlays_2020-08-21T132657.csv")
import delimited "${root}/data/derived/ACS 2014-2018 5-Year County/Individual Variables/ACSDT5Y2018.B25031_data_with_overlays_2020-08-21T132657.csv", varnames(1) clear

* Remove sections of the geo_id variable other than the county FIPS.
replace geo_id = subinstr(geo_id, "0500000US", "", .)

* Renaming geographic variables for clarity
rename geo_id county_fips
rename name county_name
assert strlen(county_fips) == 5 if _n > 1

* Drop row containing descriptions of variables
drop in 1

* Keeping only the county variable and the median 2BR rent variable
rename b25031_004e med_2br_2014_2018_est
keep county_fips county_name med_2br_2014_2018_est

* Replace non-numeric missing values with an empty string
replace med_2br_2014_2018_est = "" if med_2br_2014_2018_est == "-"

* Convert all variables to numeric
destring county_fips, replace
destring med_2br_2014_2018_est, replace

*-------------------------------------------------------------------------------
* Save file
*-------------------------------------------------------------------------------

save "${root}/data/derived/ACS 2014-2018 5-Year County/Individual Variables/ACS 2014-2018 Median 2BR Rent.dta", replace
project, creates("${root}/data/derived/ACS 2014-2018 5-Year County/Individual Variables/ACS 2014-2018 Median 2BR Rent.dta")